import pandas as pd

#the three assignment functions
def dirAssign(tdf):
    ttdf = tdf[tdf['HUPCT00']==max(tdf['HUPCT00'])]
    return(ttdf.iloc[0, ttdf.columns.get_loc('tprov')])

def maxWeight(tdf):
    mw = max(tdf['HUPCT00'])
    ttdf = tdf[tdf['HUPCT00']==mw]
    return(max(ttdf['tprov']))
    
def weightSum(tdf):
    sumProv = 0
    for indexK, rowK in tdf.reset_index().iterrows():
        tsum = tdf.iloc[indexK, tdf.columns.get_loc('HUPCT00')]*tdf.iloc[indexK, tdf.columns.get_loc('tprov')]
        sumProv = sumProv + tsum
    return sumProv
    
def main(CWref,inData):
    #start by assigning crosswalk changetypes
    df = pd.read_csv(CWref)
    
    #remove 0 contribution entries
    df = df[df['HUPCT00'] > 0]
    
    #get the unique 2010 geographies
    tracts10 = df['GEOID10'].unique()
    
    #create the final results df
    finaldf = pd.DataFrame()
    
    finaldf['TRACT10'] = tracts10
    finaldf['changetype'] = 0
    
    #and now assign the actual changetypes
    for index, row in finaldf.iterrows():
        tdf = df[df['GEOID10'] == row['TRACT10']]
        
        inCo = len(tdf.index)
        
        #for a single input 2000 tract
        if inCo == 1:
            ttdf = df[df['GEOID00'] == int(tdf['GEOID00'])]
            
            #and a single 2010 output tract
            if len(ttdf.index) == 1:
                finaldf.iloc[index, finaldf.columns.get_loc('changetype')] = 1
            
            #or more than one 2010 output tract
            elif len(ttdf.index) > 1:
                finaldf.iloc[index, finaldf.columns.get_loc('changetype')] = 3
            
        #and now checking for changetypes 2 and 4
        elif inCo > 1:
            for indexJ, rowJ in tdf.reset_index().iterrows():
                ttdf = df[df['GEOID00'] == rowJ['GEOID00']]
                
                if len(ttdf.index) > 1:
                    finaldf.iloc[index, finaldf.columns.get_loc('changetype')] = 4
                    break
                elif len(ttdf.index) == 1:
                    finaldf.iloc[index, finaldf.columns.get_loc('changetype')] = 2
                    break                
    
    #you can opt to write the changetype results here, before assigning providers
    finaldf.to_csv(r'D:\Work\BIDS\Final Scripts\2008-2010\tempResults.csv')
    #mixed strategy to assign providers
    df = finaldf
    
    #change the relevant column calls in the following for block for the year of interest
    df['prov08'] = 0
    df['prov09'] = 0
    df['prov10'] = 0
    
    #load the reference df, drop unneeded columns
    refdf = pd.read_csv(CWref)

    del refdf['changetype']
    del refdf['Num_GEOID00']
    
    #load the tract df
    tractdf = pd.read_csv(inData)
    
    for index, row in df.iterrows():
        tid = row['TRACT10']
        tchange = row['changetype']
    
        #account for missing info
        if tchange == -1:
            df.iloc[index, df.columns.get_loc('prov08')] = -1
            next
        
        #find the 2000 contribution tracts
        tdf = refdf[refdf['GEOID10']==tid]
        
        tdf = tdf.reset_index()
        
        tdf['tprov'] = 0
        #add the providers from the old geographies
        for indexJ, rowJ in tdf.reset_index().iterrows():
            ttdf = tractdf[tractdf['tract_fips'] == rowJ['GEOID00']]
            tprov = ttdf['total_prov']
            
            if len(tprov) == 0:
                tprov = -1
            elif len(tprov) > 0:
                try:
                    ttprov = int(tprov)
                except:
                    tprov=-1
            tdf.iloc[indexJ, tdf.columns.get_loc('tprov')] = int(tprov)
            
            ##change the prov column to whatever year of interest is specified in the main function
            if tchange==1:
                df.iloc[index, df.columns.get_loc('prov08')] = dirAssign(tdf)
                next
                
            elif tchange==2:
                df.iloc[index, df.columns.get_loc('prov08')] = maxWeight(tdf)
                next
            
            elif tchange==3:
                df.iloc[index, df.columns.get_loc('prov08')] = weightSum(tdf)
                next
                
            elif tchange==4:
                if len(tdf.index)==1:
                    df.iloc[index, df.columns.get_loc('prov08')] = weightSum(tdf)
                elif len(tdf.index)>1:
                    df.iloc[index, df.columns.get_loc('prov08')] = maxWeight(tdf)
                next
    
    #write results
    df.to_csv(r'D:\Work\BIDS\Final Scripts\2008-2010\results08.csv')

if __name__ == '__main__':
    #change relevant paths for input data
    main(CWref=r'D:\Work\BIDS\Final Scripts\2008-2010\crosswalk_2000_2010_household.csv',
        inData=r'D:\Work\BIDS\Final Scripts\2008-2010\hs_mapdata_dec_2008.csv')